import jieba
import pygal
from pygal.style import LightColorizedStyle as LCS, LightenStyle as LS

with open("D:\\Python\\projects\\python_01\\files\\JourneytotheWest.txt", encoding="gb18030") as file:
    contents = file.read()
# 使用jieba中的方法lcut对文本进行精确分词
words = jieba.lcut(contents)
# 存储词语及其出现的次数
counts = {}
for word in words:
    # 单个字排除
    if len(word) == 1:
        continue
    else:
        counts[word] = counts.get(word, 0) + 1

# 将键值对转换成list列表
items = list(counts.items())
# reverse=True降序
items.sort(key=lambda x: x[1], reverse=True)
names, dicts = [], []
for i in range(50):
    word, count = items[i]
    txt = "{0:<5}{1:>5}".format(word, count)
    # print(txt)
    names.append(word)
    dicts.append(count)
    # 可视化
    my_style = LS('#333366', base_style=LCS)
    chart = pygal.Bar(style=my_style, x_label_rotation=45, show_legend=False)
    chart.title = '《西游记》文章中出现率前50的中文词组（2字）'
    chart.x_labels = names
    chart.add('', dicts)
chart.render_to_file('111.svg')
